*===============================================================================
*
*					WORKER BELIEFS ABOUT OUTSIDE OPTIONS
*		(c)	Simon Jaeger, Christopher Roth, Nina Roussille, Benjamin Schoefer
*							  2023 December 5
*						   	   	 Local Data 
*
*===============================================================================


********************************************************************************
*							Cleaning Robustness Survey 	 					   *
******************************************************************************** 


clear

use "$exp_data/raw_robust"
* this is the raw data, the only change that has been made is that identifying information (e.g. IP addresses and exact location) have been removed 

quietly describe, varlist
local vars `r(varlist)'
local omit pretaxwageearning point_more pretreatment_oo post_outsideoption
local want : list vars - omit
destring `want', replace

tab employmentstatus, m // only the 946 +  293 who are full- or part-time employment proceeded

tab selfemployed // 62 are self-employed
keep if selfemployed==2

keep if employmentstatus==1 | employmentstatus==2

gen ft_employed = employmentstatus==1 if employmentstatus!=.
la var ft_employed "Full-time Employed"

gen female = gender==2 if gender!=.
la var female "Female"

* Creating Age in levels:
gen age_level = 2021-age
la var age_level "Age"

sum age_level, d

* Residency: East/West:
gen west = 0
gen east = 0
la var east "East"

foreach v in 1 78 319 324 327 397 488 597 670 739 {
	replace west = 1 if residency_1 == `v'
}

foreach v in 279 282 380 683 710 770 {
	replace east = 1 if residency_1 == `v'
}

* higher education 
gen uni_degree = education2>=6 if education2!=.
tab uni_degree
la var uni_degree "University Degree"

* destring wachterquestion
replace wachterquestion = "1600" if wachterquestion=="1.600" // hand code different input formats 
replace wachterquestion = "11.50" if wachterquestion=="11,50"
replace wachterquestion = "9.50" if wachterquestion=="9,50"
replace wachterquestion = "2200" if wachterquestion=="2200,00"
replace wachterquestion = "2884" if wachterquestion=="2884,37"
replace wachterquestion = "3500" if wachterquestion=="3500,00"
replace wachterquestion = "4900" if wachterquestion=="4.900"
destring leveloo level00_reminder wachterquestion, replace force

* wage categories 
gen earn_alt = 250 if pretaxwageearncat==20
replace earn_alt = 750 if pretaxwageearncat==21
replace earn_alt = 1250 if pretaxwageearncat==22
replace earn_alt = 1750 if pretaxwageearncat==24
replace earn_alt = 2250 if pretaxwageearncat==25
replace earn_alt = 2750 if pretaxwageearncat==26
replace earn_alt = 3250 if pretaxwageearncat==27
replace earn_alt = 3750 if pretaxwageearncat==28
replace earn_alt = 4250 if pretaxwageearncat==29
replace earn_alt = 4750 if pretaxwageearncat==30
replace earn_alt = 5250 if pretaxwageearncat==31
replace earn_alt = 5750 if pretaxwageearncat==32
replace earn_alt = 6500 if pretaxwageearncat==33
replace earn_alt = 7500 if pretaxwageearncat==34
replace earn_alt = 8500 if pretaxwageearncat==35
replace earn_alt = 9500 if pretaxwageearncat==36
replace earn_alt = 12500 if pretaxwageearncat==37
replace earn_alt = 17500 if pretaxwageearncat==38

la var pretaxwageearncts "Gross Monthly Labor Income"

* pay rank question  
gen payrank_less_comb = employer_payrank_occ_1
replace payrank_less_comb = employer_payrank_1 if payrank_less_comb==.

gen payrank_same_comb = employer_payrank_occ_2
replace payrank_same_comb = employer_payrank_2 if payrank_same_comb==.

gen payrank_more_comb = employer_payrank_occ_3
replace payrank_more_comb = employer_payrank_3 if payrank_more_comb==.
  
* drop inconsistent earning answers and missings
drop if tarif_ja_nein==. | payrank_less_comb==. | wachterquestion==. 
* 996 qualified participants 

* reported earnings where the earning category does not correspond to the precisly reported earning 
gen diff_earn = pretaxwageearncts - earn_alt

drop if diff_earn<-500 & inrange(pretaxwageearncat,20,32)
drop if diff_earn<-750 & pretaxwageearncat==33
drop if diff_earn<-1000 & inrange(pretaxwageearncat,34,36)
drop if diff_earn<-3000 & pretaxwageearncat==37
drop if diff_earn<-5000 & pretaxwageearncat==38

drop if diff_earn>500 & inrange(pretaxwageearncat,20,31)
drop if diff_earn>750 & pretaxwageearncat==32
drop if diff_earn>1000 & inrange(pretaxwageearncat,33,35)
drop if diff_earn>3000 & pretaxwageearncat==36
drop if diff_earn>5000 & pretaxwageearncat==37
* 937 qualified participants 

* implausibly low or high ansers 
drop if pretaxwageearncts<170 | pretaxwageearncts >=25000
drop if wachterquestion<=100 | wachterquestion>25000
* 907 qualified participants



*** Alternative Elicitation 

gen wage_premium_alt = pretaxwageearncts - leveloo
replace wage_premium_alt = pretaxwageearncts - level00_reminder if wage_premium_alt==.

tab wage_premium_alt

* winsorize outliders
replace wage_premium_alt=-3500 if wage_premium_alt <-3500 &   wage_premium_alt!=. 
replace wage_premium_alt=3500  if  wage_premium_alt >3500 & wage_premium_alt!=. 

tab wage_premium_alt

gen wage_change_alt =  - wage_premium_alt


** SOEP elicitation 
gen wagepremium_soep = 0 if gsoepperson_moreless==0
replace wagepremium_soep = 25 if oopersongsoep_less==4
replace wagepremium_soep = 75 if oopersongsoep_less==5
replace wagepremium_soep = 150 if oopersongsoep_less==6
replace wagepremium_soep = 250 if oopersongsoep_less==7
replace wagepremium_soep = 350 if oopersongsoep_less==8
replace wagepremium_soep = 450 if oopersongsoep_less==9
replace wagepremium_soep = 625 if oopersongsoep_less==10
replace wagepremium_soep = 875 if oopersongsoep_less==11
replace wagepremium_soep = 1250 if oopersongsoep_less==12
replace wagepremium_soep = 1750 if oopersongsoep_less==13
replace wagepremium_soep = 2500 if oopersongsoep_less==14
replace wagepremium_soep = 3500 if oopersongsoep_less==15
replace wagepremium_soep = -25 if oopersongsoep_more==4
replace wagepremium_soep = -75 if oopersongsoep_more==5
replace wagepremium_soep = -150 if oopersongsoep_more==6
replace wagepremium_soep = -250 if oopersongsoep_more==7
replace wagepremium_soep = -350 if oopersongsoep_more==8
replace wagepremium_soep = -450 if oopersongsoep_more==9
replace wagepremium_soep = -625 if oopersongsoep_more==10
replace wagepremium_soep = -875 if oopersongsoep_more==11
replace wagepremium_soep = -1250 if oopersongsoep_more==12
replace wagepremium_soep = -1750 if oopersongsoep_more==13
replace wagepremium_soep = -2500 if oopersongsoep_more==14
replace wagepremium_soep = -3500 if oopersongsoep_more==15

gen wage_change_soep = - wagepremium_soep

gen pct_wage_change_soep = wage_change_soep/pretaxwageearncts
gen pct_wage_change_alt = wage_change_alt/pretaxwageearncts

la var wagepremium_soep "Perceived Wage Premium (in Euro): SOEP elic."
la var wage_premium_alt "Perceived Wage Premium (in Euro): Alt. elic."
la var wage_change_soep "Own Wage Change (in Euro): Main Elicitation"
la var wage_change_alt "Own Wage Change (in Euro): Alt. Elicitation"
la var wage_change_alt "Perceived Wage Premium (as perc. of wage: alt elic."
la var pct_wage_change_soep "Own Wage Change (in percent): Main Elicitation"
la var pct_wage_change_alt  "Own Wage Change (in percent): Alt. Elicitation" 

gen soepelicitation = wagepremium_soep!=.

gen wage_premium_combined = wagepremium_soep
replace wage_premium_combined = wage_premium_alt if wage_premium_combined==.

gen wage_change_combined = wage_change_soep
replace wage_change_combined= wage_change_alt if wage_change_combined==.

gen  pct_wage_change_combined = (wage_change_combined/pretaxwageearncts)

* winsorize wage changes at -+100% 
replace pct_wage_change_combined = 1 if pct_wage_change_combined>1
replace pct_wage_change_combined = -1 if pct_wage_change_combined<-1 // 8 obs overall affected 

gen pct_wage_premium_alt = (wage_premium_alt/pretaxwageearncts)
replace pct_wage_premium_alt = -1 if pct_wage_premium_alt<-1

gen pct_wage_change_withinocc = pct_wage_change_combined if occupation=="in Ihrem Beruf zu finden."
gen pct_wage_change_general = pct_wage_change_combined if occupation=="zu finden."

gen pct_wage_change_3m = pct_wage_change_combined if timing=="drei"
gen pct_wage_change_12m = pct_wage_change_combined if timing!="drei"

la var  pct_wage_change_alt "Own Wage Change as \% of Wage: Alt. Elicitation"
la var  pct_wage_change_soep "Own Wage Change as \% of Wage: Main Elicitation"
la var pct_wage_change_withinocc "Own Wage Change as \% of Wage: Occupation-specific"
la var pct_wage_change_general  "Own Wage Change as \% of Wage: Not Occupation-specific" 
la var pct_wage_change_3m "Own Wage Change as \% of Wage: 3-Month Horizon"
la var pct_wage_change_12m  "Own Wage Change as \% of Wage: 12-Month Horizon" 

gen payrank_less_comb_occ = payrank_less_comb if occ==1
gen payrank_less_comb_general = payrank_less_comb if occ==0
la var payrank_less_comb_occ "Fraction of Other Employers Paying Less: Occupation-specific"
la var payrank_less_comb_general  "Fraction of Other Employers Paying Less: Occupation-specific" 


** Create SWITCH IN / OUT variable 

* recode soep answers to percentage points 
rename switchingsopless switchingsoepless // there is a typo here
rename switchingsopmore switchingsoepmore // there is a typo here	
replace switchoutmorele = -1 if switchoutmorele == 1 
replace switchoutmorele = 1 if switchoutmorele == 2 
foreach yar in "in" "out"{
	foreach var in "more" "less"{
		replace switch`yar'gsoep`var' = 1 if switch`yar'gsoep`var' == 1
		replace switch`yar'gsoep`var' = 3.5 if switch`yar'gsoep`var' == 2
		replace switch`yar'gsoep`var' = 7.5 if switch`yar'gsoep`var' == 3
		replace switch`yar'gsoep`var' = 12.5 if switch`yar'gsoep`var' == 4
		replace switch`yar'gsoep`var' = 17.5 if switch`yar'gsoep`var' == 5
		replace switch`yar'gsoep`var' = 25 if switch`yar'gsoep`var' == 6
		replace switch`yar'gsoep`var' = 40 if switch`yar'gsoep`var' == 7
		replace switch`yar'gsoep`var' = 62.5 if switch`yar'gsoep`var' == 8
		replace switch`yar'gsoep`var' = 87.5 if switch`yar'gsoep`var' == 9		
	}		
}
su switchingsoepmore switchoutgsoepless switchingsoepless switchoutgsoepmore 

* combined measure of soep-elicitation answers	
foreach yar in "in" "out"{
	foreach var in "soep" "alt"{
		if "`var'" == "soep"{
			local name "gsoep"
		}
		else{
			local name ""
		}
		gen switch`yar'_`var' = switch`yar'`name'morele // this gives us the zeros 
		replace switch`yar'_`var' = switch`yar'`name'more if switch`yar'`name'morele == 1
		replace switch`yar'_`var' = -switch`yar'`name'less if switch`yar'`name'morele == -1
		replace switch`yar'_`var' = switch`yar'_`var' / 100		
	}	
	gen switch`yar'_combined = switch`yar'_alt
	replace switch`yar'_combined = switch`yar'_soep if switch`yar'_combined == .
	* deal with outliers: keep but set to .625
	foreach var in "soep" "alt" "combined"{
		replace switch`yar'_`var' = 0.625 if switch`yar'_`var' >=0.625  & switch`yar'_`var' !=.
		replace switch`yar'_`var' = -0.625 if  switch`yar'_`var' <=-0.625  & switch`yar'_`var' !=.
		table switch`yar'_`var' // 15 obs in combined at +-62.5 
	}
}


** wage change for the layoff question
 
gen wage_change_masslayoff =    wachterquestion - pretaxwageearncts
gen pct_wage_change_masslayoff = (wage_change_masslayoff/pretaxwageearncts)

* winsorize wage change at -+ 100% 
replace pct_wage_change_masslayoff=-1 if pct_wage_change_masslayoff<-1
replace pct_wage_change_masslayoff=1 if pct_wage_change_masslayoff>1 // 12 obs overall affected 
la var pct_wage_change_masslayoff  "Own Wage Change (in percent): Mass Layoff" 


** Median Belief Question 

* Inflation adjusted wages: 1.5% 2019 and 0.05% 2020
gen kldb_3_median_wage_adj = kldb_3_median_wage * 1.015 * 1.005

* For all jobs, there is a median wage observed
tab occupation_drilldown_3 if kldb_3_median_wage==. & !missing(occupation_drilldown_3)

* Creating upper and lower bar of incentivized median estimation:
tab belief_occu_inc

* dealing with missspellings 
replace belief_occu_inc = "1600" if belief_occu_inc=="1.600"
replace belief_occu_inc = "2600" if belief_occu_inc=="260ü"
replace belief_occu_inc = "2884.37" if belief_occu_inc=="2884,37"
replace belief_occu_inc = "3100" if belief_occu_inc=="3.100"
replace belief_occu_inc = "3600" if belief_occu_inc=="3.600,00"
replace belief_occu_inc = "5.05" if belief_occu_inc=="5,05"
destring belief_occu_inc, replace force
replace belief_occu_inc = belief_occu_inc/12 if belief_occu_inc>=40000 // assumed misspecification: annual input 

gen belief_occu_inc_low = belief_occu_inc * 0.95
gen belief_occu_inc_high = belief_occu_inc * 1.05

gen correct_inc = 1 if kldb_3_median_wage_adj >= belief_occu_inc_low & kldb_3_median_wage_adj <= belief_occu_inc_high & belief_occu_inc!=.

tab correct_inc

* Creating upper and lower bar of not-incentivized median estimation:
tab belief_occu

replace belief_occu = "4500" if belief_occu == "4000-5000"
replace belief_occu = "4000" if belief_occu == "4.000"

destring belief_occu, replace force
replace belief_occu = 150000/12 if belief_occu==150000 // assumed misspecification: annual input 

gen belief_occu_low = belief_occu * 0.95
gen belief_occu_high = belief_occu * 1.05

gen correct_noninc = 1 if kldb_3_median_wage_adj >= belief_occu_low & kldb_3_median_wage_adj <= belief_occu_high & belief_occu!=.

* Merging beliefs
gen beliefs_merged = belief_occu
replace beliefs_merged = belief_occu_inc if belief_occu_inc!=.

* Creating variables to 'over'
gen incentives = 1 if belief_occu_inc!=.
replace incentives = 0 if belief_occu!=. 

* Absolute Estimate mistakes:
gen abs_median_fail = kldb_3_median_wage_adj - beliefs_merged if beliefs_merged != .

* Delete 10% outliers
sum abs_median_fail, d
local p_5  `r(p5)'
local p_95 `r(p95)'
replace abs_median_fail = . if abs_median_fail < `p_5'
replace abs_median_fail = . if abs_median_fail > `p_95'
 
* Percentage Estimate mistakes
gen perc_median_fail = (beliefs_merged - kldb_3_median_wage_adj)/kldb_3_median_wage_adj if beliefs_merged != . & abs_median_fail!=.
sum perc_median_fail, d


** to have the soep elicitation as the first category, some data cleaning: 
gen soep_order = soepelicitation * (-1)
gen timing_order = 0 
replace timing_order = 1 if timing == "zwölf"
gen occupation_order = occ * (-1)  	
	
save "$temp/robust.dta", replace

  